function [MDP] = aip(MDP)
%Function to compute the best action according to the received MDP model, 
% using active inference and gradient descent on the free-energy

    %% Initialization

    % Useful indexes for dimensions   
    % ---------------------------------------------------------------------
    Np = size(MDP.V,2);     % number of allowable policies
    Ns = size(MDP.B{1},1);  % number of hidden states
    Nu = size(MDP.B{1},3);  % number of hidden controls
    No = Ns;                % number of outcomes, we assume we can sense s
    Ni    = 4;              % number of VB iterations

    T = size(MDP.V,1)+1;    % Time horizon of 1 
    
    % (indices of) plausible (allowable) policies
    %----------------------------------------------------------------------
    V  = MDP.V;
    
    %Additional 
    erp = 1; % Reset
    %tau = 4; % Update time constant
    alpha =  1; % Precision parameter
    
    % initialise posteriors over polices and action
    %----------------------------------------------------------------------
    u  = zeros(Np,1);
    
    % Local variables for readability
    % ---------------------------------------------------------------------
    % Prior over initial state, we only have one factor
    if isfield(MDP,'d') 
        D = NORM(MDP.d{1});
    elseif isfield(MDP,'D')
        D = NORM(MDP.D{1});
    else
        D = NORM(ones(Ns,1));
        MDP(m).D = D;
    end
    
    % Likelihood matrix 
    A = NORM(MDP.A{1});
    % Transition matrices B
    for j = 1:Nu
       % Forward message, B
       B(:,:,j) = NORM(MDP.B{1}(:,:,j));
       % Backeard message, transpose of B
       bB(:,:,j) = NORM(MDP.B{1}(:,:,j)');
    end           
    % Prior preferences (log probabilities) : C
    C = LOG(SOFTMAX(MDP.C{1}));
    % Preference over policies
    E = NORM(MDP.E);
    qE = LOG(E);

    % Current observation
    o = zeros(1,T); % We have T observations, that we will sample if not present
    % If outcomes have been specified then set it, otherwise leave to 0
    %----------------------------------------------------------------------
    if isfield(MDP,'o')
        o(1) = MDP.o;
    end
    
    O = zeros(1,Ns,No);

    % Initial guess over current posterior state
    % Initialize variable to contain the states over T
    s = zeros(1,T);
    [~,s(1)] = max(D);  
    
    % Initialise posterior expectations of hidden states
    %----------------------------------------------------------------------
    x  = zeros(Ns,T,Np)+ 1/Ns;    % Uniform uncertainty over every state
    X  = D;
    for k = 1:Np
        x(:,1,k) = D; % Set the current state to what contained in D. At the next step it is still uncertain
    end   
        
    %% State estimatioqn given an MDP process line 438

    % Looping over over successive time points, up to T
    %======================================================================

    for t = 1:T
        % sample state, if not specified
        %--------------------------------------------------------------
        % the next state is generated by action on external states
        %----------------------------------------------------------
        if s(t) == 0
            ps = B(:,s(t - 1),MDP.u(t - 1)); % This is equivalent to ps = s+1=B*s
            [~,ind] = max(ps);
            s(t) = ind;
        end
          
        % Bayesian model average (xq)
        %----------------------------------------------------------
                        
        % if outcome is not specified
        %----------------------------------------------------------
        if ~o(t)                   
            % outcome  sample from likelihood given hidden state
            %--------------------------------------------------
            po = A(:,s(t));
            [~,ind] = max(po);
            o(t) = ind;
        end
        % Put it in sparse form, convert scalar index to a "1" in the
        % corresponding place
        O(1,o(t),t) = 1;       
        
        % Variational updates (hidden states) under sequential policies (equations from mathematical derivation)
        %==============================================================
        F = zeros(Np,1); % Free energy for each of the policies
        
        for k = 1:Np % Basically loop over the available policies. Equivalent to k = V
            for i = 1:Ni % iterate belief updates
                % Main loop for free-energy computation, state estimation
                F(k)  = 0;          % reset free energy for this policy
                for j = 1:T         % loop over future time points
                
                    % current posterior for a given policy
                    %--------------------------------------------------
                        
                    % hidden states for this time and policy
                    %----------------------------------------------
                    sx = x(:,j,k);
                    qL = zeros(Ns,1);
                    
                    % evaluate free energy and gradients (v = dFdx)
                    % =====================================================
                   
                    % marginal likelihood over outcome factors??
                    %------------------------------------------
                    if j <= t
                        qL = LOG( A * O(:,:,j)' );  %should be A', transpose of A
                    end
                    
                    % entropy
                    %------------------------------------------
                    qx  = LOG(sx);
                    
                    % emprical priors 
                    %------------------------------------------
                    if j == 1 % FIRST timestep (Backward messages)
                        px = LOG(D)+LOG( bB(:,:,V(k)) * x(:,j+1,k) ); % Simplify here the indexes for x since T = 2 in this case
                        vF  = px + qL - qx; % gradient descent
                    else % (Forward messages)
                        px = LOG( B(:,:,V(k)) * x(:,j-1,k) );
                        vF  = px + qL - qx;
                    end
                    
                    if j == 1 % FIRST ITERATION (Backward messages)
                        FF = -qx + LOG(D) + qL;
                    else % (Forward messages)
                        FF = -qx + LOG( B(:,:,V(k)) * x(:,j-1,k) ) + qL;
                    end
                    
                    % (negative) free energy
                    %------------------------------------------
                    %F(k) = F(k) + sx'*vF;
                    
                    F(k) = F(k) + sx'*FF;   % Removing the backward messages, we need to specify more preferences for the action null so lnE.
                    % update
                    %------------------------------------------
                    sx = SOFTMAX(qx+vF); % Set gradient of F to zero and take softmax, remove log(sx)
                    
                    % store update neuronal activity
                    %----------------------------------------------
                    x(:,j,k) = sx; % This stores the posterior expectation over states, called sx
                    %xq = sx; % Consider eliminating this                          
                end    
            end
        end
        % accumulate expected free energy of policies (Q)
        %==============================================================
        Q   = zeros(Np,1); % expected free energy
    
        % Ni influences also x since
        % that you compute Ni times the posterior but every time you start
        % from the previous trial since the info is stored in x
        
        for k = 1:length(V)
                    
            % Bayesian surprise about inital conditions
            %------------------------------------------------------
            for j = t:T % Only from current time untile the end T  
                % prior preferences about outcomes
                %----------------------------------------------
                qo   = A*x(:,j,k);
                Q(k) = Q(k) + qo'*(C'-LOG(qo));
            end
        end
        % variational updates - policies and precision
        %==========================================================
        w(t) = 1; % You can consider updating it but for now not important
        % posterior belief about policies
        %------------------------------------------------------
        qu = SOFTMAX(qE + w(t)*Q + F);
        u(:,t)  = qu; % every column of u indicates the posterior about a policy
        % Bayesian model averaging of hidden states (over policies)
        %--------------------------------------------------------------
        for i = 1:T % S indicates the time step, reshape puts the cells of
            % x for each of the allowable policies as a column for X. Then
            % we multiply for u, to obtain the policy independent state
            % estimation
            X(:,i) = reshape(x(:,i,:),Ns,Np)*u(:,t);
        end
        
        % record (negative) free energies
        %--------------------------------------------------------------
        MDP.F(:,t) = F; % Note that F(k) is the total (accumulated) F after T time steps according to policy K
        MDP.G(:,t) = Q;            
        
        % action selection u_t = max Sum kronDrlta u,pi qu(pi)
        %==================================================================
        if t < T   
            % marginal posterior over action 
            %--------------------------------------------------------------
            Pu = SOFTMAX(alpha*log(u(:,t)));
            [~,my_ind] = max(Pu);
            MDP.u(:,t) = my_ind;
        end
        
        % terminate evidence accumulation
        %------------------------------------------------------------------
        if t == T
            MDP.u  = MDP.u(:,1:T - 1);  % actions  at 1,...,T - 1
            %break;
        end
    end
    





    %% learning - accumulate concentration parameters
    %======================================================================
    % initial hidden states:
    %----------------------------------------------------------------------
    if isfield(MDP,'d')
        MDP.d{1} = MDP.d{1} + MDP.kappa_d*X(:,1);
        MDP.d{1} = NORM(MDP.d{1});
        
        % Most probable state
        [~,MDP.s] = max(MDP.d{1});
    else
        [~,MDP.s] = max(MDP.D{1});
    end
    % assemble results and place in MDP structure - Auxiliary
    %----------------------------------------------------------------------
    % MDP.X  = X;       % Bayesian model averages over T outcomes
end

% auxillary functions
%==========================================================================

function A  = LOG(A)
% log of numeric array plus a small constant to avoid numrical issues
%--------------------------------------------------------------------------
A  = log(A + 1e-16);
end

function A = NORM(A)
    % Normalisation of probability matrix (column elements sum to 1)
    %----------------------------------------------------------------------
    % The function goes column by column and it normalise such that 
    % elements of each colums sum to 1

    for i = 1:size(A,2)     
       S = sum(A(:,i),1);
       if S > 0
           A(:,i) = A(:,i)/S;
       else
           A(:,i) = 1/size(A,1);
       end
    end

end

function sigma = SOFTMAX(x)
    % Function that given a vector x computes the softmax
    % sigma  = exp(x)/sum(exp(x))
    
    % Compute exponential
    ex = exp(x);
    % Compute softmax
    for i = 1:numel(x)
        sigma(i) = ex(i)/sum(ex);
    end 
end